# ! /usr/bin/python3 
# -*- coding:utf-8 -*-  
# @Author: vicnic  
# @Date: 2018-09-10 10:02:32   

import Tools as tl
import Settings as st
from enum import Enum, unique
import json
import requests

@unique #@unique装饰器可以帮助我们检查保证没有重复值。
class WebSite(Enum):
    BILIBILI = 1
    LIKEGRILS = 2#美女图文网站http://www.xhmn.net/
    HAOQIXIN = 3#好奇心日报http://www.qdaily.com/
    KUAIZIXUN = 4#快资讯
    MEIZITU = 5#uc妹子图
    MEITULU = 6#美图录www.meitulu.com

def startSpider(url_list,web_type,isJson):
    if isJson:
        for url in url_list:
            analyzeJson(url, web_type)        
    else:
        for url in url_list:
            tree = tl.getHtmlTrees(url)
            analyzeTree(tree,web_type)


def analyzeTree(tree,web_type):
    #哔哩哔哩专栏图片下载
    if web_type==WebSite.BILIBILI:
        img_list = tree.xpath('//figure[@class="img-box"]/img/@data-src')
        plist = tree.xpath('//div[@class="article-holder"]/p/text()')
        for e in img_list:
            pic_url = 'http:'+e
            tl.downLoadFile(pic_url)
        w = open(st.resorce_save_path+'TinySpider_BiLiBiLi.txt','wb+')
        for e in plist:
            w.write(e.encode('utf8'))
            w.write('\r\n\r\n'.encode('utf8'))
        w.close
    elif web_type==WebSite.LIKEGRILS:
        img_list = []
        img_list = tree.xpath('//div[@class="show_tu"]/div/a/img/@src')
        if len(img_list)==0:
            img_list = tree.xpath('//div[@class="show_tu"]/div/a/span/span/img/@src')
            if len(img_list)==0:
                img_list = tree.xpath('//div[@class="show_tu"]/img/@src')
                for e in img_list:
                    pic_url = 'http://www.xhmn.net/'+e
                    tl.downLoadFile(pic_url)
            else:
                for e in img_list:
                    pic_url = 'http://www.xhmn.net/'+e
                    tl.downLoadFile(pic_url)
    elif web_type == WebSite.HAOQIXIN:
        plist = tree.xpath('//div[@class="article-detail-bd"]/div[@class="detail"]/p/text()')
        img_list = tree.xpath('//div[@class="com-insert-images"]/figure/img/@data-src')
        img2_list = tree.xpath('//div[@class="com-insert-images medium-insert-active"]/figure/img/@data-src')
        for e in img2_list:
            pic_url = tl.getContenByRE(e,'((http|https):\/\/)+(\w+\.)+(\w+)[\w\/\.\-]*(jpg|gif|png)')
            tl.downLoadFile(e)
        for e in img_list:
            pic_url = tl.getContenByRE(e,'((http|https):\/\/)+(\w+\.)+(\w+)[\w\/\.\-]*(jpg|gif|png)')
            tl.downLoadFile(e)
        w = open(st.resorce_save_path+'TinySpider_haoqixin.txt','wb+')
        for e in plist:
            w.write(e.encode('utf8'))
            w.write('\r\n\r\n'.encode('utf8'))
        w.close
    elif web_type == WebSite.KUAIZIXUN:
        plist = []
        img_list = []
        plist = tree.xpath('//div[@class="content"]/div/p/text()')
        img_list = tree.xpath('//div[@class="content"]/div/div/img/@src')
        w = open(st.resorce_save_path+'TinySpider_kuaizixun.txt','wb+')
        if len(img_list)==0:
            if len(plist)==0:
                plist = tree.xpath('//div[@class="content"]/p/text()')
            img_list = tree.xpath('//div[@class="content"]/img/@src')
        if len(img_list)==0:
            img_list = tree.xpath('//div[@class="content"]/p/span/img/@src')
        
        for e in plist:
            w.write(e.encode('utf8'))
            w.write('\r\n\r\n'.encode('utf8'))
            w.close
        for e in img_list:
            tl.downLoadFile(e)
        
    elif web_type ==WebSite.MEIZITU:
        img_list = tree.xpath('//div[@id="post_content"]/noscript/img/@src')
        for e in img_list:
            tl.downLoadFile(e)
    
def analyzeTreeByUrl(url ,web_type):
    if web_type == WebSite.MEITULU:
        tree = tl.getHtmlTrees(url)
        url_split = url.split('.html')
        base_url = url_split[0]
        downLoadPageTreeMeiTuLu(tree)
        index = 2
        while True:
            page_url = base_url+'_%s.html'%index
            page_tree = tl.getHtmlTrees(page_url)
            if page_tree is not None:
                downLoadPageTreeMeiTuLu(page_tree)
            else:
                print('到末尾了1')
                break

def downLoadPageTreeMeiTuLu(tree):
    img_list = tree.xpath('//div[@class="content"]/center/img/@src')
    if len(img_list)==0:
        print('到末尾了2')
    else:
        for e in img_list:
            tl.downLoadFile(e)

        
if __name__ == '__main__':  
    startSpider(['https://www.bilibili.com/read/cv1290566?from=category_0'],WebSite.BILIBILI,False)
    # analyzeTreeByUrl('https://www.meitulu.com/item/15444.html',WebSite.MEITULU)
